#Libraries
library(tidyverse)
library(rmarkdown)
library(RODBC)
library(RODBCext)
library(optparse)
library(data.table)
library(Matrix)
library(outliers)
library(plyr)
library(dplyr)
library(reshape2)
library(pastecs)
library(ggmap)
library(plotly)
library(rgeos)
library(sp)
Set your database connection info
server <- "USINTERN13\\SQLEXPRESS"
database <-"test1"
#startDate <- "20160101"
#endDate <- "20170202"
Reads in unique 108 zones
allNames <- as.data.frame(read.csv("C:/Users/sspinetto/Desktop/allnames.csv", stringsAsFactors = FALSE))
allLats <- as.data.frame(read.csv("C:/Users/sspinetto/Desktop/weatherStationList.csv", stringsAsFactors = FALSE))
f <- function(s) strsplit(s, "_")
names <-sapply(allNames, f)
uniqueNames <- colsplit(allNames$mx_cfe_manzanillo_one_load_act, pattern="_", names = c("0","1","name","4","5","6"))
finalNames <- unique(uniqueNames$name)
finalNames[[53]]<- "acapulco"
finalNames<- sort(finalNames)
This is the function that gets the % of non zeroes for each load type, later it is looped through for each zone
getMyZeros <- function (server, database, finalNames) {
tryCatch({
necName <- finalNames
fileName1 <- paste0("mx_cfe_",necName)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)
zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
#
# print (zeroPercent0)
# print (zeroPercent1)
# print (zeroPercent2)
# print (zeroPercent3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
summary(compareLoad$dif0to1)
summary(compareLoad$dif1to2)
summary(compareLoad$dif2to3)
summary(compareLoad$dif0to3)
}, error= function(e){})
return(zeroVector)
}
here we have our function to get summaries
getMySummaries <- function (server, database, finalNames) {
tryCatch({
fileName1 <- paste0("mx_cfe_", finalNames)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
zeroPercent0 <- mean(!compareLoad$fullLoad.load_act_0)
zeroPercent1 <- mean(!compareLoad$fullLoad.load_act_1)
zeroPercent2 <- mean(!compareLoad$fullLoad.load_act_2)
zeroPercent3 <- mean(!compareLoad$fullLoad.load_act_3)
zeroVector <- c(zeroPercent0,zeroPercent1,zeroPercent2,zeroPercent3)
#
# print (zeroPercent0)
# print (zeroPercent1)
# print (zeroPercent2)
# print (zeroPercent3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
mean01 <- summary(load0$load_act)
sumVector <- mean01
}, error= function(e){})
return(sumVector)
}
third function to get something else :P
getMyOther <- function (server, database, finalNames) {
tryCatch({
fileName1 <- paste0("mx_cfe_", finalNames)
#set connection
setServ <- paste0("driver={SQL Server};server=",server,";database=",database,";trusted_connection=true")
myServer <- odbcDriverConnect(setServ)
# perform queries to zero, one, two, three
query0 <- paste0("select date,time,load_act from ",fileName1,"_zero_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load0 <- sqlQuery(myServer,query0)
query1 <- paste0("select date,time,load_act from ",fileName1,"_three_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load3 <- sqlQuery(myServer,query1)
query2 <- paste0("select date,time,load_act from ",fileName1,"_two_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load2 <- sqlQuery(myServer,query2)
query3 <- paste0("select date,time,load_act from ",fileName1,"_one_load_act"
# ," where date > ",startDate," AND date < ", endDate #uncomment this line if using custom date set
)
load1 <- sqlQuery(myServer,query3)
# trunc for compare
trunc0 <- head(load0, -4872)
trunc1 <- head(load1, -3864)
trunc2 <- head(load2, -2520)
trunc3 <- load3
#combine
fullLoad <- as.data.frame(cbind(trunc0,trunc1,trunc2,trunc3))
colnames(fullLoad) <- c("date_0", "time_0", "load_act_0","date_1", "time_1", "load_act_1","date_2", "time_2", "load_act_2",
"date_3", "time_3", "load_act_3")
## sanity check to make sure columns align
#
# head(fullLoad)
## clean up
compareLoad <- data.frame(fullLoad$date_0,fullLoad$time_0,fullLoad$load_act_0,
fullLoad$load_act_1,fullLoad$load_act_2,fullLoad$load_act_3)
### lets find the differences
compareLoad$dif0to1 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_1
compareLoad$dif1to2 <- compareLoad$fullLoad.load_act_1 - compareLoad$fullLoad.load_act_2
compareLoad$dif2to3 <- compareLoad$fullLoad.load_act_2 - compareLoad$fullLoad.load_act_3
compareLoad$dif0to3 <- compareLoad$fullLoad.load_act_0 - compareLoad$fullLoad.load_act_3
difVector <- data_frame(compareLoad$fullLoad.date_0, compareLoad$fullLoad.time_0,
compareLoad$fullLoad.load_act_0,compareLoad$fullLoad.load_act_1,compareLoad$fullLoad.load_act_2,
compareLoad$fullLoad.load_act_3,
compareLoad$dif0to1,compareLoad$dif1to2,compareLoad$dif2to3,compareLoad$dif0to3)
check01<- intersect(difVector$`compareLoad$fullLoad.load_act_0`,difVector$`compareLoad$fullLoad.load_act_1`)
check01<- intersect(difVector$`compareLoad$fullLoad.load_act_0`,difVector$`compareLoad$fullLoad.load_act_1`)
check01<- intersect(difVector$`compareLoad$fullLoad.load_act_0`,difVector$`compareLoad$fullLoad.load_act_1`)
length(check)
}, error= function(e){})
return(difVector)
}
First Loop, Calculates Non Zeroes
loop_zero_data <- vector("list", length(finalNames))
for(x in seq_along(finalNames)){
loop_zero_data[[x]] <- getMyZeros(server,database,finalNames[[x]])
}
names(loop_zero_data) <- finalNames
### prints % of zeroes in each data set, load0, load1, load2, load3
head(loop_zero_data)
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $acapulco
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $ags
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $apatzingan
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $caborca
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
##
## $camargo
## [1] 0.0990566 0.5330189 0.3254717 0.1745283
second Loop calculates summaries
loop_sum_data <- vector("list", length(finalNames))
for(x in seq_along(finalNames)){
loop_sum_data[[x]] <- getMySummaries(server,database,finalNames[[x]])
}
names(loop_sum_data) <- finalNames
### prints % of zeroes in each data set, load0, load1, load2, load3
head(loop_sum_data)
## $acapulco
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 185.3 212.1 188.6 231.4 495.0
##
## $acapulco
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 185.3 212.1 188.6 231.4 495.0
##
## $ags
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 341.1 403.2 361.7 456.4 711.0
##
## $apatzingan
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 80.71 93.84 86.84 110.03 190.64
##
## $caborca
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 64.92 129.83 172.14 291.75 461.05
##
## $camargo
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 81.57 113.04 111.97 148.98 263.37
third loop 4 for differences between 01, 12, 23, 03
loop_dif_data <- vector("list", length(finalNames))
for(x in seq_along(finalNames)){
loop_dif_data[[x]] <- getMyOther(server,database,finalNames[[x]])
}
names(loop_dif_data) <- finalNames
Put Summary Data in Table 4 plotting
getMedian <- ldply(loop_sum_data)
plot_ly(getMedian, y= ~Median, type='scatter', mode='markers', text= ~.id)
lets see what else we can visualize that’s useful…..
lookupNames <- sapply(finalNames, paste0, " Mexico")
lookupNames2<- sapply(allLats$stationname, paste0, "Mexico")
lonlat <- geocode(lookupNames)
set1<- data.frame(p1=lonlat$lon,p2=lonlat$lat)
lonlat2<-geocode(lookupNames2)
set2<- data.frame(p1=lonlat2$lon,p2=lonlat2$lat)
cities <- cbind(finalNames, lonlat)
myPlace <- "Mexico"
myZoom <- 5
myMap <- get_map(location=myPlace, zoom=myZoom)
mapper <- ggmap(myMap)
mapper+ geom_point(data=cities, aes(x=lon, y=lat), size=2,colour= "red")+geom_text(data =cities,aes(label=.id),hjust=0, vjust=0, size=2)

What’s next !? – lets find closest weather station to each!
distp1p2 <- function(p1,p2) {
dst <- sqrt((p1[1]-p2[1])^2+(p1[2]-p2[2])^2)
return(dst)
}
dist2b <- function(y) which.min(apply(set2, 1, function(x) min(distp1p2(x,y))))
bestStation<- apply(set1, 1, dist2b)
stationKey<- data.frame(allLats$stationid,allLats$stationname)
head(bestStation)
## [[1]]
## [1] 21
##
## [[2]]
## [1] 21
##
## [[3]]
## [1] 25
##
## [[4]]
## [1] 20
##
## [[5]]
## [1] 26
##
## [[6]]
## [1] 60
head(stationKey)
## allLats.stationid allLats.stationname
## 1 2076910 Bahias De Huatulco
## 2 2076919 Chichen-Itza
## 3 2076918 Ciudad Del Carmen
## 4 2076911 Ciudad Juarez
## 5 2076922 Del Bajio/Leon
## 6 2076912 Guaymas